#4. Hierarchical Cluster Analysis
#add libraries and source docs and stuff
library(vegan)
library(permute)
library(cluster)
library(pvclust)
envdata<-read.csv('env.csv',header=TRUE,row.names=1)
speabu<-read.csv('abund.csv',header=TRUE,row.names=1)
###compute the dissimilarity/distance matrix
#standardize the env data first (since measurements on different scales)
envdata.tra<-data.stand(envdata,method='standardize',margin='column',plot=F)
#calculate Euclidean distance
site.eucd<-vegdist(envdata.tra,method='euclidean')
###compute hierarchical clustering
#this one is hierarchical agllomerative on dissimilarity (can do a lot of options: complete, average, median etc)
sitecl.com<-hclust(site.eucd,method='complete')
sitecl.ave<-hclust(site.eucd,method='average')
sitecl.ward<-hclust(site.eucd,method='ward')
#now look at them..
#print a summary
hclus.table(sitecl.ave)
#plot a dendrogram
plot(sitecl.ave)
#make it fancier
plot(sitecl.ave,main='Average-linkage Dendrogram', xlab='Sites', ylab='Euclidean distance', hang=-1)
##DO IT AGAIN WITH A DIFFERENT DATASET - species abundance
#standardize pre/abs
speocc<-data.trans(speabu[,-1],method='power',exp=0,plot=F)
#calc coefficient (Jaccards)
sp.jacd<-vegdist(speocc,method='jaccard')
#calc the clusters (average)
specl.ave<-hclust(sp.jacd,method='average')
#plot it
plot(specl.ave)
#calc cluster using single linkage
sitecl.sin<-hclust(site.eucd,method="single")
##DECIDE ON NUMBER OF CLUSTERS TO RETAIN
#first go back and look at plot of env. data
plot(sitecl.ave)
#now tell it that you want 9 clusters (outlined in red)
rect.hclust(sitecl.ave,k=9)
#or you can decide based on height (distance)
#note: replot it again or it just starts overlapping crap
plot(sitecl.ave)
rect.hclust(sitecl.ave,h=6)
#based on the analysis we now will 'cut' the tree into clusters (this creates a vector 'sitecl.class' that contains the cluster membership..which will be used in next analysis)
sitecl.class<-cutree(sitecl.ave,k=9)
#I just want to look at it to make sure there are 9 clusters
sitecl.class
##CLUSTER DIAGNOSTICS evaluating the cluster solution
#compute the agglomeration coefficient (>.75 considered good)
coef.hclust(sitecl.ave)
coef.hclust(sitecl.com)
#or compute the cophenetic coorelation coefficient
cor(site.eucd,cophenetic(sitecl.ave))
#can also plot the cophenetic correlation coefficient
hclus.cophenetic(site.eucd,sitecl.ave)
#scree plot-looks at dissimiliarity between the clusters, usually the 'elbow' is a good place to 'cut' the dendrogram
hclus.scree(sitecl.sin)
hclus.scree(sitecl.ave)
#Looking at the Stability of Clusters using bootstrapping
#BOOTSTRAP test: do your clusters remain intact if you take a random subsampling of your objects with replacement?  This function (usually used in DNA analysis) does that.  Note: the 't' function to transpose the data because the function assume objects are columns (not rows), and only running 50 bootstraps (nboot) to save computational time
clus.stab<-pvclust(t(envdata.tra),method.hclust='average',method.dist='euclidean', nboot=50)
#now look at plot and highlight sig. clusters at a critical p<0.01
plot(clus.stab)
pvrect(clus.stab,alpha=0.99)
#once we examine clusters we may want to describe DIFFERENCES AMONG CLUSTERS
#in particular explore univariate differences among clusters
#1 option is to bind clusters - here cluster classes (9 of them, from above) are 'bound' 
envdata.new<-cbind(sitecl.class,envdata)
#the cluster (1-9) is now the first column of the matrix
#Now use boxplots to infer differences ..
box.plots(envdata.new,by='sitecl.class')
#..orA 2nd option is to use a nonparametric Kruskall-Wallis ranktest
clus.stats(envdata.new,sitecl.class)
#Extra things and different libraries, methods
#use agnes() instaed of hclus
sitecl.agnes<-agnes(envdata.tra, metric='euclidean', method='complete')
plot(sitecl.agnes)
#hierarchical divisive clustering
sitec13.dia<-diana(site.eucd)
#monothetic hierarchical divisive clustering 
#i.e. using a single descriptor as the basis for the partitioning of the data at each step
#then use mona() function
sitec14.mona<-mona(speocc)